library(Seurat)
library(Signac)
library(tidyverse)
library(ggplot2)
library(ggpubr)
library(magick)
library(knitr)
library(kableExtra)
library(devtools)
library(harmony)
library(patchwork)
library(kableExtra)
set.seed(123)
# Paths
path_to_obj <- ("~/Documents/multiome_tonsil_Lucia/results/R_objects/12.tonsil_multiome_without_cluster6n7_doublets_normalized.rds")
path_to_save <- ("~/Documents/multiome_tonsil_Lucia/results/R_objects/13.tonsil_multiome_bcells_without_doublets_normalized.rds")
tonsil_wnn_bcell <- readRDS(path_to_obj)
vars <- str_subset(colnames(tonsil_wnn_bcell@meta.data), "wsnn_res.0.075")
clusters_gg <- purrr::map(vars, function(x) {
p <- DimPlot(
tonsil_wnn_bcell,
group.by = x,
reduction = "wnn.umap",
pt.size = 0.1, label = T
)
p
})
clusters_gg
## [[1]]
tonsil_wnn_bcell<-FindSubCluster(
tonsil_wnn_bcell,
2,
graph.name="wsnn",
subcluster.name = "sub.cluster2",
resolution = 0.05,
algorithm = 1
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8497
## Number of edges: 287152
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9565
## Number of communities: 2
## Elapsed time: 1 seconds
vars <- str_subset(colnames(tonsil_wnn_bcell@meta.data), "^sub.cluster")
clusters_gg <- purrr::map(vars, function(x) {
p <- DimPlot(
tonsil_wnn_bcell,
group.by = x,
reduction = "wnn.umap",
pt.size = 0.1, label = T
)
p
})
clusters_gg
## [[1]]
##
## [[2]]
##
## [[3]]
tonsil_wnn_bcell$is_tcell <-
tonsil_wnn_bcell$wsnn_res.0.05 == "1" |
tonsil_wnn_bcell$wsnn_res.0.05 == "4" |
tonsil_wnn_bcell$wsnn_res.0.05 == "6" |
tonsil_wnn_bcell$wsnn_res.0.05 == "7" |
tonsil_wnn_bcell$sub.cluster2=="2_1"
tonsil_wnn_bcell <- subset(tonsil_wnn_bcell, subset = is_tcell == FALSE)
DimPlot(tonsil_wnn_bcell, reduction = "wnn.umap", group.by = "wsnn_res.0.05", label = TRUE, pt.size = 0.1)
We exclude the first dimension as this is typically correlated with sequencing depth Cells cluster completely separately in ATAC without harmony; so run harmony after SVD
RunSVD LSI
DefaultAssay(tonsil_wnn_bcell) <- "peaks"
tonsil_wnn_bcell <- RunTFIDF(tonsil_wnn_bcell)
## Performing TF-IDF normalization
tonsil_wnn_bcell <- FindTopFeatures(tonsil_wnn_bcell, min.cutoff = "q0")
tonsil_wnn_bcell <- RunSVD(tonsil_wnn_bcell)
## Running SVD
## Scaling cell embeddings
Compute the correlation between total counts and each reduced dimension component.
LSI component is typically highly correlated with sequencing depth. The first LSI component often captures sequencing depth (technical variation) rather than biological variation. If this is the case, the component should be removed from downstream analysis. We can assess the correlation between each LSI component and sequencing depth using the DepthCor() function:
For scRNA-seq data we don’t typically observe such a strong relationship between the first PC and sequencing depth, and so usually retain the first PC in downstream analyses.
DepthCor(tonsil_wnn_bcell)
Here we see there is a very strong correlation between the first LSI component and the total number of counts for the cell, so we will perform downstream steps without this component.
tonsil_wnn_bcell <- RunUMAP(
tonsil_wnn_bcell,
dims = 2:40,
reduction = "lsi",
reduction.name = "umap.atac",
reduction.key = "atacUMAP_"
)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 20:58:57 UMAP embedding parameters a = 0.9922 b = 1.112
## 20:58:57 Read 44633 rows and found 39 numeric columns
## 20:58:57 Using Annoy for neighbor search, n_neighbors = 30
## 20:58:57 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:59:03 Writing NN index file to temp file /var/folders/kz/y10np0cj213fybqz181z9lghbzr42p/T//RtmpK1PKZa/file1bcc3d8d23ba
## 20:59:03 Searching Annoy index using 1 thread, search_k = 3000
## 20:59:24 Annoy recall = 100%
## 20:59:25 Commencing smooth kNN distance calibration using 1 thread
## 20:59:28 Initializing from normalized Laplacian + noise
## 20:59:29 Commencing optimization for 200 epochs, with 1909944 positive edges
## 20:59:58 Optimization finished
atac.umap<-DimPlot(
tonsil_wnn_bcell,
reduction = "umap.atac",
group.by = "library_name",
pt.size = 0.1
) + ggtitle('scATAC UMAP') + NoLegend()
atac.umap
#split_by: library ,edad, genero
DefaultAssay(tonsil_wnn_bcell) <- "RNA"
tonsil_wnn_bcell <- NormalizeData(
tonsil_wnn_bcell,
normalization.method = "LogNormalize",
scale.factor = 1e4
)
tonsil_wnn_bcell <- tonsil_wnn_bcell %>%
FindVariableFeatures(nfeatures = 3000) %>%
ScaleData() %>%
RunPCA()
PCAPlot(tonsil_wnn_bcell,
group.by = "library_name")
ElbowPlot(object = tonsil_wnn_bcell)
find variable genes
tonsil_wnn_bcell <- RunUMAP(
tonsil_wnn_bcell,
dims = 1:30,
reduction = "pca",
reduction.name = "umap.rna",
reduction.key = "rnaUMAP_"
)
## 21:01:38 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:01:38 Read 44633 rows and found 30 numeric columns
## 21:01:38 Using Annoy for neighbor search, n_neighbors = 30
## 21:01:38 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:01:42 Writing NN index file to temp file /var/folders/kz/y10np0cj213fybqz181z9lghbzr42p/T//RtmpK1PKZa/file1bcc28678f3
## 21:01:42 Searching Annoy index using 1 thread, search_k = 3000
## 21:01:58 Annoy recall = 100%
## 21:01:59 Commencing smooth kNN distance calibration using 1 thread
## 21:02:01 Initializing from normalized Laplacian + noise
## 21:02:03 Commencing optimization for 200 epochs, with 1957252 positive edges
## 21:02:30 Optimization finished
rna.umap<-DimPlot(
tonsil_wnn_bcell,
reduction = "umap.rna",
group.by = "library_name",
pt.size = 0.1) + NoLegend() + ggtitle('scRNA UMAP')
rna.umap
atac.umap + rna.umap
hacer primero harmony, quitar batch effect. atac y rna. harmony
Pass the Seurat object to the RunHarmony function and specify which variable to integrate out. A Seurat object is returned with corrected Harmony coordinates.
DefaultAssay(tonsil_wnn_bcell) <- "peaks"
tonsil_wnn_bcell <- RunHarmony(
object = tonsil_wnn_bcell,
reduction = "lsi",
dims = 2:40,
group.by.vars = "library_name",
assay.use = "peaks",
project.dim = FALSE,
reduction.save = "harmony_peaks"
)
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony 6/10
## Harmony 7/10
## Harmony 8/10
## Harmony 9/10
## Harmony 10/10
## Harmony converged after 10 iterations
tonsil_wnn_bcell <- RunUMAP(
tonsil_wnn_bcell,
dims = 2:40,
reduction = "harmony_peaks",
reduction.name = "umap.atac",
reduction.key = "atacUMAP_"
)
## 21:06:35 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:06:35 Read 44633 rows and found 39 numeric columns
## 21:06:35 Using Annoy for neighbor search, n_neighbors = 30
## 21:06:35 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:06:39 Writing NN index file to temp file /var/folders/kz/y10np0cj213fybqz181z9lghbzr42p/T//RtmpK1PKZa/file1bcc4ab2bd10
## 21:06:39 Searching Annoy index using 1 thread, search_k = 3000
## 21:06:59 Annoy recall = 100%
## 21:07:00 Commencing smooth kNN distance calibration using 1 thread
## 21:07:02 Initializing from normalized Laplacian + noise
## 21:07:04 Commencing optimization for 200 epochs, with 1974986 positive edges
## 21:07:34 Optimization finished
Harm_peak<-DimPlot(
tonsil_wnn_bcell,
reduction = "umap.atac",
group.by = "library_name",
pt.size = 0.1
) + NoLegend() + ggtitle('Peak Harmony')
tonsil_wnn_bcell <- RunUMAP(
tonsil_wnn_bcell,
dims = 2:40,
reduction = "harmony_rna",
reduction.name = "umap.rna",
reduction.key = "rnaUMAP_"
)
## 21:09:07 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:09:07 Read 44633 rows and found 39 numeric columns
## 21:09:07 Using Annoy for neighbor search, n_neighbors = 30
## 21:09:07 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:09:12 Writing NN index file to temp file /var/folders/kz/y10np0cj213fybqz181z9lghbzr42p/T//RtmpK1PKZa/file1bcc476a40c5
## 21:09:12 Searching Annoy index using 1 thread, search_k = 3000
## 21:09:28 Annoy recall = 100%
## 21:09:29 Commencing smooth kNN distance calibration using 1 thread
## 21:09:32 Initializing from normalized Laplacian + noise
## 21:09:33 Commencing optimization for 200 epochs, with 2011620 positive edges
## 21:10:01 Optimization finished
Harm_rna<-DimPlot(
tonsil_wnn_bcell,
reduction = "umap.rna",
group.by = "library_name",
pt.size = 0.1
) + NoLegend() + ggtitle('RNA Harmony')
Harm_peak+Harm_rna
FindNeighbors
Constructs a Shared Nearest Neighbor (SNN) Graph for a given dataset. We first determine the k-nearest neighbors of each cell. We use this knn graph to construct the SNN graph by calculating the neighborhood overlap (Jaccard index) between every cell and its k.param nearest neighbors.
# build a joint neighbor graph using both assays
tonsil_wnn_bcell <- FindMultiModalNeighbors(
object = tonsil_wnn_bcell,
reduction.list = list("harmony_peaks", "harmony_rna"),
dims.list = list(2:40, 1:30), modality.weight.name = "Joint_snn_umap"
)
## Calculating cell-specific modality weights
## Finding 20 nearest neighbors for each modality.
## Calculating kernel bandwidths
## Warning in FindMultiModalNeighbors(object = tonsil_wnn_bcell, reduction.list
## = list("harmony_peaks", : The number of provided modality.weight.name is not
## equal to the number of modalities. peaks.weight RNA.weight are used to store the
## modality weights
## Finding multimodal neighbors
## Constructing multimodal KNN graph
## Constructing multimodal SNN graph
# build a joint UMAP visualization
tonsil_wnn_bcell <- RunUMAP(
object = tonsil_wnn_bcell,
nn.name = "weighted.nn",
reduction.name = "wnn.umap",
reduction.key = "wnnUMAP_")
## 21:12:46 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:12:47 Commencing smooth kNN distance calibration using 1 thread
## 21:12:49 Initializing from normalized Laplacian + noise
## 21:12:50 Commencing optimization for 200 epochs, with 1412680 positive edges
## 21:13:19 Optimization finished
joint.umap<- DimPlot(tonsil_wnn_bcell, label = FALSE, group.by = "library_name", pt.size = 0.1, reduction = "wnn.umap") + plot_annotation(title = 'Joint UMAP')+ ggtitle('Joint UMAP by library name') + NoLegend()
joint.umap
joint.umap_age<- DimPlot(tonsil_wnn_bcell, label = FALSE, split.by = "age_group", pt.size = 0.1, reduction = "wnn.umap") + plot_annotation(title = 'Joint UMAP ')+ ggtitle('Joint UMAP age ')
joint.umap_hospital<- DimPlot(tonsil_wnn_bcell, label = FALSE, split.by = "hospital", pt.size = 0.1, reduction = "wnn.umap") + plot_annotation(title = 'Joint UMAP ')+ ggtitle('Joint UMAP age ')
joint.umap_age
joint.umap_hospital
#find cluster algorithm 3 = SLM algorithm
tonsil_wnn_bcell <- FindClusters(tonsil_wnn_bcell, resolution = c(0.005,0.01,0.05,0.075,0.1,0.25,0.5),algorithm = 3, graph.name = "wsnn",verbose = FALSE)
print(colnames(tonsil_wnn_bcell@meta.data))
## [1] "lib_name_barcode" "orig.ident" "nCount_RNA"
## [4] "nFeature_RNA" "nCount_ATAC" "nFeature_ATAC"
## [7] "nucleosome_signal" "nucleosome_percentile" "TSS.enrichment"
## [10] "TSS.percentile" "tss.level" "percent.mt"
## [13] "percent_ribo" "nCount_peaks" "nFeature_peaks"
## [16] "library_name" "donor_id" "sex"
## [19] "age" "age_group" "hospital"
## [22] "assay" "barcodes" "doublet_scores"
## [25] "predicted_doublets" "peaks.weight" "RNA.weight"
## [28] "wsnn_res.0.005" "wsnn_res.0.01" "seurat_clusters"
## [31] "sub.cluster_0.25" "sub.cluster0_0.5" "is_doublet"
## [34] "wsnn_res.0.05" "wsnn_res.0.75" "wsnn_res.0.075"
## [37] "is_tcell" "sub.cluster2" "wsnn_res.0.1"
## [40] "wsnn_res.0.25" "wsnn_res.0.5"
vars <- str_subset(colnames(tonsil_wnn_bcell@meta.data), "^wsnn_res")
clusters_gg <- purrr::map(vars, function(x) {
p <- DimPlot(
tonsil_wnn_bcell,
group.by = x,
reduction = "wnn.umap",
pt.size = 0.1, label = T
)
p
})
clusters_gg
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
Idents(tonsil_wnn_bcell)<-"wsnn_res.0.05"
tonsil_markers_05<-FindAllMarkers(object = tonsil_wnn_bcell, only.pos = TRUE)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
write.csv(tonsil_markers_05,file=paste0("~/Documents/multiome_tonsil_Lucia/results/tables/", "tonsil_markers_bcell_05.csv"))
Resolution 0.01
tonsil_markers_05 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) %>% write.csv(.,file=paste0("~/Documents/multiome_tonsil_Lucia/results/tables/", "top10_tonsil_markers_bcell_05.csv"))
tonsil_markers_05 %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC) %>% write.csv(.,file=paste0("~/Documents/multiome_tonsil_Lucia/results/tables/", "top5_tonsil_markers_bcell_05.csv"))
top5_tonsil_markers_05<-tonsil_markers_05 %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
top10_tonsil_markers_05<-tonsil_markers_05 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
df_top5<-as.data.frame(top5_tonsil_markers_05)
kbl(df_top5,caption = "Table of the top 5 marker of each cluster resolution 0.005") %>%
kable_paper("striped", full_width = F)
| p_val | avg_log2FC | pct.1 | pct.2 | p_val_adj | cluster | gene |
|---|---|---|---|---|---|---|
| 0 | 2.410092 | 0.992 | 0.796 | 0 | 0 | BANK1 |
| 0 | 2.383726 | 0.616 | 0.159 | 0 | 0 | COL19A1 |
| 0 | 2.099433 | 0.803 | 0.332 | 0 | 0 | CAMK1D |
| 0 | 2.063847 | 0.727 | 0.280 | 0 | 0 | MAML2 |
| 0 | 1.955181 | 0.837 | 0.318 | 0 | 0 | CELF2 |
| 0 | 2.935490 | 0.956 | 0.158 | 0 | 1 | HMGB2 |
| 0 | 2.855373 | 0.968 | 0.243 | 0 | 1 | TUBA1B |
| 0 | 2.707539 | 0.968 | 0.254 | 0 | 1 | H2AFZ |
| 0 | 2.468852 | 0.854 | 0.328 | 0 | 1 | HIST1H4C |
| 0 | 2.453695 | 0.816 | 0.038 | 0 | 1 | TOP2A |
| 0 | 2.641706 | 0.837 | 0.248 | 0 | 2 | MAML3 |
| 0 | 2.527205 | 0.986 | 0.297 | 0 | 2 | AC023590.1 |
| 0 | 2.441617 | 0.823 | 0.167 | 0 | 2 | AC104170.1 |
| 0 | 2.225239 | 0.933 | 0.255 | 0 | 2 | RAPGEF5 |
| 0 | 2.152638 | 0.799 | 0.247 | 0 | 2 | LHFPL2 |
| 0 | 2.507478 | 0.891 | 0.027 | 0 | 3 | FYB1 |
| 0 | 2.446032 | 0.831 | 0.029 | 0 | 3 | INPP4B |
| 0 | 2.048815 | 0.740 | 0.005 | 0 | 3 | THEMIS |
| 0 | 1.892236 | 0.894 | 0.138 | 0 | 3 | PRKCH |
| 0 | 1.809279 | 0.688 | 0.012 | 0 | 3 | IL7R |
| 0 | 5.710049 | 0.498 | 0.151 | 0 | 4 | IGHGP |
| 0 | 5.576323 | 0.828 | 0.473 | 0 | 4 | IGLC1 |
| 0 | 5.654174 | 0.961 | 0.918 | 0 | 4 | IGKC |
| 0 | 6.007131 | 0.647 | 0.436 | 0 | 4 | IGHA1 |
| 0 | 5.590649 | 0.895 | 0.761 | 0 | 4 | IGLC2 |
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
tonsil_wnn_bcell <- CellCycleScoring(tonsil_wnn_bcell, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
## Warning: The following features are not present in the object: MLF1IP, not
## searching for symbol synonyms
## Warning: The following features are not present in the object: FAM64A, HN1, not
## searching for symbol synonyms
head(tonsil_wnn_bcell[[]])
## lib_name_barcode orig.ident
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 BCLL_15_T_1_AAACAGCCAGCAACCT-1 SeuratProject
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 BCLL_15_T_1_AAACAGCCAGCTTAGC-1 SeuratProject
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 BCLL_15_T_1_AAACATGCAGGCCAAA-1 SeuratProject
## BCLL_15_T_1_AAACCAACACGAATTT-1 BCLL_15_T_1_AAACCAACACGAATTT-1 SeuratProject
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 BCLL_15_T_1_AAACCGAAGCTATGAC-1 SeuratProject
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 BCLL_15_T_1_AAACCGAAGTAAAGGT-1 SeuratProject
## nCount_RNA nFeature_RNA nCount_ATAC
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 2938 1493 14475
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 5693 2527 14100
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 2377 1121 12678
## BCLL_15_T_1_AAACCAACACGAATTT-1 6476 2543 11978
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 2285 1078 16821
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 5027 2220 15923
## nFeature_ATAC nucleosome_signal
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 6069 0.9178862
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 5960 0.7073955
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 5233 0.5805921
## BCLL_15_T_1_AAACCAACACGAATTT-1 5077 0.5724638
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 6613 0.5307644
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 6633 0.6731493
## nucleosome_percentile TSS.enrichment
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 0.88 4.890692
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 0.44 3.685808
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 0.15 5.826994
## BCLL_15_T_1_AAACCAACACGAATTT-1 0.14 5.295245
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 0.08 5.239743
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 0.35 4.178657
## TSS.percentile tss.level percent.mt percent_ribo
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 0.31 High 9.326072 5.616065
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 0.03 High 3.249605 4.180573
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 0.73 High 13.378208 19.099706
## BCLL_15_T_1_AAACCAACACGAATTT-1 0.50 High 2.424336 2.362569
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 0.48 High 14.529540 15.229759
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 0.09 High 5.390889 2.048936
## nCount_peaks nFeature_peaks library_name
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 7403 6067 BCLL_15_T_1
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 7876 6443 BCLL_15_T_1
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 5989 4857 BCLL_15_T_1
## BCLL_15_T_1_AAACCAACACGAATTT-1 5910 4926 BCLL_15_T_1
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 8042 6239 BCLL_15_T_1
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 8606 6978 BCLL_15_T_1
## donor_id sex age age_group hospital assay
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 BCLL-15-T male 33 young_adult CIMA multiome
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 BCLL-15-T male 33 young_adult CIMA multiome
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 BCLL-15-T male 33 young_adult CIMA multiome
## BCLL_15_T_1_AAACCAACACGAATTT-1 BCLL-15-T male 33 young_adult CIMA multiome
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 BCLL-15-T male 33 young_adult CIMA multiome
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 BCLL-15-T male 33 young_adult CIMA multiome
## barcodes doublet_scores
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 AAACAGCCAGCAACCT-1 0.020
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 AAACAGCCAGCTTAGC-1 0.024
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 AAACATGCAGGCCAAA-1 0.019
## BCLL_15_T_1_AAACCAACACGAATTT-1 AAACCAACACGAATTT-1 0.015
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 AAACCGAAGCTATGAC-1 0.020
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 AAACCGAAGTAAAGGT-1 0.016
## predicted_doublets peaks.weight RNA.weight
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 FALSE 0.5213808 0.4786192
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 FALSE 0.4637829 0.5362171
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 FALSE 0.5030760 0.4969240
## BCLL_15_T_1_AAACCAACACGAATTT-1 FALSE 0.5155517 0.4844483
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 FALSE 0.5918682 0.4081318
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 FALSE 0.4687815 0.5312185
## wsnn_res.0.005 wsnn_res.0.01 seurat_clusters
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 0 0 1
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 1 1 2
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 0 0 11
## BCLL_15_T_1_AAACCAACACGAATTT-1 0 0 9
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 0 0 0
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 1 1 2
## sub.cluster_0.25 sub.cluster0_0.5 is_doublet
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 0 0_4 FALSE
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 2 2 FALSE
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 0 0_0 FALSE
## BCLL_15_T_1_AAACCAACACGAATTT-1 0 0_3 FALSE
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 0 0_0 FALSE
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 2 2 FALSE
## wsnn_res.0.05 wsnn_res.0.75 wsnn_res.0.075
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 0 1 0
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 2 4 3
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 0 0 1
## BCLL_15_T_1_AAACCAACACGAATTT-1 0 9 0
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 0 0 1
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 2 4 3
## is_tcell sub.cluster2 wsnn_res.0.1 wsnn_res.0.25
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 FALSE 0 0 1
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 FALSE 3 3 3
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 FALSE 0 1 0
## BCLL_15_T_1_AAACCAACACGAATTT-1 FALSE 0 0 6
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 FALSE 0 1 0
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 FALSE 3 3 3
## wsnn_res.0.5 S.Score G2M.Score Phase
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 1 -0.032117648 -0.11812569 G1
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 2 -0.007383767 -0.18545173 G1
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 11 -0.034289719 -0.08897424 G1
## BCLL_15_T_1_AAACCAACACGAATTT-1 9 -0.134934431 -0.14585202 G1
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 0 -0.106464784 -0.13573257 G1
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 2 -0.143077150 -0.12467942 G1
## old.ident
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 0
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 2
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 0
## BCLL_15_T_1_AAACCAACACGAATTT-1 0
## BCLL_15_T_1_AAACCGAAGCTATGAC-1 0
## BCLL_15_T_1_AAACCGAAGTAAAGGT-1 2
print(x = tonsil_wnn_bcell[["pca"]],
dims = 1:10,
nfeatures = 5)
## PC_ 1
## Positive: TXNIP, MAML2, COL19A1, BCL2, AC105402.3
## Negative: STMN1, MKI67, HMGB2, MYBL1, H2AFZ
## PC_ 2
## Positive: AC104170.1, TOX, MAML3, LHFPL2, CCDC144A
## Negative: TOP2A, AURKB, UBE2C, ASPM, GTSE1
## PC_ 3
## Positive: FNDC3B, ERN1, DERL3, THEMIS, FYB1
## Negative: AC104170.1, MAML3, FGD6, RAPGEF5, AC023590.1
## PC_ 4
## Positive: FKBP11, MZB1, XBP1, DERL3, SSR4
## Negative: THEMIS, BCL11B, FYB1, INPP4B, IL7R
## PC_ 5
## Positive: KIF14, CENPE, PLK1, GAS2L3, DEPDC1
## Negative: HSP90AB1, MCM4, PCNA, DTL, FABP5
## PC_ 6
## Positive: BRIP1, E2F7, AC105402.3, CLSPN, RRM2
## Negative: CDC20, PLK1, ACTG1, CCNB1, RPL7
## PC_ 7
## Positive: FCRL4, ATP8B4, AL355076.2, DNAH8, ITGAX
## Negative: IGHM, TCL1A, DPYD, COL19A1, IGHD
## PC_ 8
## Positive: MGLL, CD83, SKAP1, AC117480.1, NTNG1
## Negative: SOX5, SUGCT, BMP7, ZNF385B, WNK2
## PC_ 9
## Positive: ANK3, TBXAS1, HIPK2, TEX9, FKBP5
## Negative: FCRL4, FCRL5, IGHM, ZEB2, PTPRJ
## PC_ 10
## Positive: ZNF804A, CCSER1, RHEX, MAST4, AC105402.3
## Negative: HLA-DQA2, IGHGP, MACROD2, DHRS9, PAG1
PCNA: Proliferating cell nuclear antigen
# Visualize the distribution of cell cycle markers across
RidgePlot(tonsil_wnn_bcell, features = c("PCNA", "TOP2A", "MCM6", "MKI67"), ncol = 2)
## Picking joint bandwidth of 0.0824
## Picking joint bandwidth of 0.0756
## Picking joint bandwidth of 0.0616
## Picking joint bandwidth of 0.072
tonsil_wnn_bcell <- RunPCA(tonsil_wnn_bcell, features = c(s.genes, g2m.genes))
## Warning in PrepDR(object = object, features = features, verbose = verbose): The
## following 19 features requested have not been scaled (running reduction without
## them): UNG, PRIM1, UHRF1, MLF1IP, RFC2, RPA2, UBR7, MSH2, RAD51, TIPIN, BLM,
## CASP8AP2, USP1, CHAF1B, FAM64A, HN1, RANGAP1, PSRC1, CTCF
## Warning in irlba(A = t(x = object), nv = npcs, ...): You're computing too large
## a percentage of total singular values, use a standard svd instead.
## PC_ 1
## Positive: MKI67, TOP2A, TPX2, HMGB2, NUSAP1, CENPF, CDK1, CENPE, AURKB, GTSE1
## ANLN, NDC80, TUBB4B, BUB1, KIF11, HMMR, BIRC5, DLGAP5, SMC4, UBE2C
## RRM2, CDCA2, NUF2, ECT2, CDCA3, KIF23, CDCA8, KIF2C, CKAP2L, CCNB2
## Negative: POLD3, POLA1, MCM5, CCNE2, G2E3, SLBP, CDC6, MCM2, EXO1, MCM6
## NASP, DTL, GINS2, DSCC1, GAS2L3, CDC45, CENPA, HELLS, ATAD2, CKAP2
## WDR76, TYMS, NEK2, LBR, CBX5, GMNN, MCM4, FEN1, RAD51AP1, TMPO
## PC_ 2
## Positive: MCM4, CLSPN, HELLS, DTL, PCNA, CDC45, GINS2, CDC6, MCM6, WDR76
## BRIP1, ATAD2, POLA1, EXO1, CCNE2, FEN1, SLBP, MCM5, MCM2, RRM2
## E2F8, GMNN, DSCC1, POLD3, RRM1, NASP, TYMS, CDCA7, RAD51AP1, CBX5
## Negative: GAS2L3, AURKA, CENPE, HMMR, CDC20, UBE2C, NEK2, DLGAP5, CENPF, CENPA
## KIF23, CCNB2, CDCA8, TPX2, BUB1, TOP2A, CDCA3, TTK, HJURP, G2E3
## CKAP2L, GTSE1, CKS2, CKAP2, CDC25C, ECT2, NUF2, CKAP5, KIF2C, TUBB4B
## PC_ 3
## Positive: ANLN, E2F8, RRM2, CDC25C, NDC80, RAD51AP1, KIF11, ECT2, TMPO, BRIP1
## HJURP, CKAP2L, CDCA2, KIF23, EXO1, G2E3, GTSE1, TTK, CDK1, DSCC1
## ATAD2, BUB1, CKAP5, MKI67, TYMS, KIF20B, SMC4, POLA1, RRM1, NUSAP1
## Negative: CDC20, CCNB2, CKS2, CKS1B, NEK2, BIRC5, GINS2, HMGB2, TUBB4B, DTL
## NASP, MCM2, ANP32E, MCM5, MCM4, MCM6, SLBP, CENPF, AURKA, CDCA7
## UBE2C, CDC6, CDCA3, GMNN, HMMR, PCNA, CBX5, CDC45, LBR, CENPA
## PC_ 4
## Positive: TYMS, FEN1, CKS1B, E2F8, RRM2, RRM1, AURKB, PCNA, CDCA3, TUBB4B
## ANP32E, CKS2, RAD51AP1, UBE2C, HMGB2, BIRC5, MCM2, NDC80, MKI67, GTSE1
## CDK1, DSCC1, TOP2A, KIF2C, NCAPD2, HJURP, NUSAP1, GMNN, CDCA8, CKAP2L
## Negative: G2E3, GAS2L3, DTL, POLA1, LBR, KIF20B, CDCA7, BRIP1, CENPA, NEK2
## POLD3, HELLS, MCM6, CKAP2, WDR76, CKAP5, CDC45, EXO1, CDCA2, ECT2
## CDC6, TTK, CENPE, AURKA, CCNB2, SMC4, HMMR, ATAD2, CENPF, CCNE2
## PC_ 5
## Positive: LBR, G2E3, CBX5, SLBP, NCAPD2, CDCA7, WDR76, NASP, ANP32E, TMPO
## SMC4, CKAP5, CKS2, TACC3, HMGB2, MCM5, KIF11, HELLS, POLA1, NUSAP1
## RRM1, MCM2, ATAD2, E2F8, MKI67, RRM2, TPX2, BRIP1, NUF2, KIF20B
## Negative: POLD3, EXO1, CDC6, GAS2L3, DTL, NEK2, CENPA, CDC45, CKAP2, CCNE2
## AURKA, RAD51AP1, TTK, CDC20, MCM4, HMMR, CDK1, KIF2C, DLGAP5, GINS2
## ECT2, CLSPN, MCM6, CENPE, HJURP, UBE2C, CDC25C, KIF23, NDC80, CDCA8
tonsil_wnn_bcell <- RunUMAP(object = tonsil_wnn_bcell,
nn.name = "weighted.nn",
reduction.name = "wnn.umap",
reduction.key = "wnnUMAP_" )
## 21:28:26 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:28:28 Commencing smooth kNN distance calibration using 1 thread
## 21:28:30 Initializing from normalized Laplacian + noise
## 21:28:31 Commencing optimization for 200 epochs, with 1412680 positive edges
## 21:29:02 Optimization finished
DimPlot(tonsil_wnn_bcell,
reduction = "wnn.umap",
pt.size = 0.1, label = T, split.by = "age_group")
DimPlot(tonsil_wnn_bcell,
reduction = "wnn.umap",
pt.size = 0.1, label = T)
saveRDS(tonsil_wnn_bcell, path_to_save)
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] es_ES.UTF-8/es_ES.UTF-8/es_ES.UTF-8/C/es_ES.UTF-8/es_ES.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.1.1 harmony_0.1.0 Rcpp_1.0.8 devtools_2.4.3
## [5] usethis_2.1.5 kableExtra_1.3.4 knitr_1.37 magick_2.7.3
## [9] ggpubr_0.4.0 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.8
## [13] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0 tibble_3.1.6
## [17] ggplot2_3.3.5 tidyverse_1.3.1 Signac_1.5.0 SeuratObject_4.0.4
## [21] Seurat_4.1.0 BiocStyle_2.22.0
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 reticulate_1.24 tidyselect_1.1.1
## [4] htmlwidgets_1.5.4 grid_4.1.2 docopt_0.7.1
## [7] BiocParallel_1.28.3 Rtsne_0.15 munsell_0.5.0
## [10] codetools_0.2-18 ica_1.0-2 future_1.23.0
## [13] miniUI_0.1.1.1 withr_2.4.3 colorspace_2.0-2
## [16] highr_0.9 rstudioapi_0.13 stats4_4.1.2
## [19] ROCR_1.0-11 ggsignif_0.6.3 tensor_1.5
## [22] listenv_0.8.0 labeling_0.4.2 slam_0.1-49
## [25] GenomeInfoDbData_1.2.7 polyclip_1.10-0 farver_2.1.0
## [28] rprojroot_2.0.2 parallelly_1.30.0 vctrs_0.3.8
## [31] generics_0.1.2 xfun_0.29 lsa_0.73.2
## [34] ggseqlogo_0.1 R6_2.5.1 GenomeInfoDb_1.30.0
## [37] cachem_1.0.6 bitops_1.0-7 spatstat.utils_2.3-0
## [40] assertthat_0.2.1 promises_1.2.0.1 scales_1.1.1
## [43] gtable_0.3.0 globals_0.14.0 processx_3.5.2
## [46] goftest_1.2-3 rlang_1.0.1 systemfonts_1.0.3
## [49] RcppRoll_0.3.0 splines_4.1.2 rstatix_0.7.0
## [52] lazyeval_0.2.2 spatstat.geom_2.3-1 broom_0.7.12
## [55] BiocManager_1.30.16 yaml_2.2.2 reshape2_1.4.4
## [58] abind_1.4-5 modelr_0.1.8 backports_1.4.1
## [61] httpuv_1.6.5 tools_4.1.2 bookdown_0.24
## [64] ellipsis_0.3.2 spatstat.core_2.3-2 jquerylib_0.1.4
## [67] RColorBrewer_1.1-2 BiocGenerics_0.40.0 sessioninfo_1.2.2
## [70] ggridges_0.5.3 plyr_1.8.6 zlibbioc_1.40.0
## [73] RCurl_1.98-1.5 prettyunits_1.1.1 ps_1.6.0
## [76] rpart_4.1-15 deldir_1.0-6 pbapply_1.5-0
## [79] cowplot_1.1.1 S4Vectors_0.32.3 zoo_1.8-9
## [82] haven_2.4.3 ggrepel_0.9.1 cluster_2.1.2
## [85] fs_1.5.2 magrittr_2.0.2 RSpectra_0.16-0
## [88] data.table_1.14.2 scattermore_0.7 lmtest_0.9-39
## [91] reprex_2.0.1 RANN_2.6.1 SnowballC_0.7.0
## [94] fitdistrplus_1.1-6 matrixStats_0.61.0 pkgload_1.2.4
## [97] hms_1.1.1 mime_0.12 evaluate_0.14
## [100] xtable_1.8-4 sparsesvd_0.2 readxl_1.3.1
## [103] IRanges_2.28.0 gridExtra_2.3 testthat_3.1.1
## [106] compiler_4.1.2 KernSmooth_2.23-20 crayon_1.5.0
## [109] htmltools_0.5.2 mgcv_1.8-38 later_1.3.0
## [112] tzdb_0.2.0 lubridate_1.8.0 DBI_1.1.2
## [115] tweenr_1.0.2 dbplyr_2.1.1 MASS_7.3-54
## [118] Matrix_1.3-4 car_3.0-12 cli_3.1.1
## [121] parallel_4.1.2 igraph_1.2.11 GenomicRanges_1.46.1
## [124] pkgconfig_2.0.3 plotly_4.10.0 spatstat.sparse_2.1-0
## [127] xml2_1.3.3 svglite_2.0.0 bslib_0.3.1
## [130] webshot_0.5.2 XVector_0.34.0 rvest_1.0.2
## [133] callr_3.7.0 digest_0.6.29 sctransform_0.3.3
## [136] RcppAnnoy_0.0.19 spatstat.data_2.1-2 Biostrings_2.62.0
## [139] rmarkdown_2.11 cellranger_1.1.0 leiden_0.3.9
## [142] fastmatch_1.1-3 uwot_0.1.11 shiny_1.7.1
## [145] Rsamtools_2.10.0 lifecycle_1.0.1 nlme_3.1-153
## [148] jsonlite_1.7.3 carData_3.0-4 limma_3.50.0
## [151] desc_1.4.0 viridisLite_0.4.0 fansi_1.0.2
## [154] pillar_1.7.0 lattice_0.20-45 pkgbuild_1.3.0
## [157] fastmap_1.1.0 httr_1.4.2 survival_3.2-13
## [160] remotes_2.4.2 glue_1.6.1 qlcMatrix_0.9.7
## [163] png_0.1-7 ggforce_0.3.3 stringi_1.7.6
## [166] sass_0.4.0 memoise_2.0.1 irlba_2.3.5
## [169] future.apply_1.8.1